home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
The Atari Compendium
/
The Atari Compendium (Toad Computers) (1994).iso
/
files
/
umich
/
tex
/
lgrind.zoo
/
regexp.c
< prev
next >
Wrap
C/C++ Source or Header
|
1992-09-28
|
15KB
|
682 lines
#ifndef lint
static char *sccsid="@(#)regexp.c 1.2 (LBL) 4/12/85";
static char Version[] =
"$Id: regexp.c,v 1.2 91/10/01 00:30:20 gvr Exp $";
#endif
/*
* Regular expression matching routines for lgrind/tgrind/tfontedpr.
*
* These routines were written by Dave Presotto (I think) for vgrind.
* Minor mods & attempts to improve performance by Van Jacobson
* (van@@lbl-rtsg) and Chris Torek (chris@@maryland).
*
* Modifications.
* --------------
* Sep 91 George V Reilly Fixed up some bogus uses of @NIL@ and @NULL@.
* 30 Mar 85 Van & Chris Changed @expmatch()@ to return pointer to
* start of what was matched in addition to
* pointer to match end. Several changes to
* improve performance (too numerous to mention).
* 11 Dec 84 Dave Presotto Written.
*/
#include <stdio.h>
#include <malloc.h>
#include <ctype.h>
typedef int boolean;
#define TRUE 1
#define FALSE 0
#define makelower(c) (isupper((c)) ? tolower((c)) : (c))
void expconv(); /* forward declaration */
int (*re_strncmp)(); /* function used by @expmatch()@ to compare
* strings. The caller should make it point to
* @strncmp()@ if case is significant &
* @lc_strncmp()@ otherwise.
*/
/* @lc_strncmp()@ --- like @strncmp()@ except that we convert the
* first string to lower case before comparing.
*/
int
lc_strncmp(s1, s2, len)
register char *s1,*s2;
register int len;
{
while (len-- > 0)
if (*s2 - makelower(*s1))
return 1;
else
s2++, s1++;
return 0;
}
/* The following routine converts an irregular expression to
* internal format.
*
* Either meta symbols (%|\a|%, %|\d|%, or %|\p|%) or character strings
* or operations (alternation or parenthesizing) can be specified.
* Each starts with a descriptor byte. The descriptor byte
* has @STR@ set for strings, @META@ set for meta symbols,
* and @OPER@ set for operations. The descriptor byte can also have
* the @OPT@ bit set if the object defined is optional. Also @ALT@
* can be set to indicate an alternation.
*
* For metasymbols, the byte following the descriptor byte identities
* the meta symbol (containing an ASCII @'a'@, @'d'@, @'p'@, @'|'@,
* or @'('@). For strings, the byte after the descriptor is a
* character count for the string:
*@
* meta symbols := descriptor
* symbol
*
* strings := descriptor
* character count
* the string
*
* operations := descriptor
* symbol
* character count@
*/
/*
* handy macros for accessing parts of match blocks
*/
#define MSYM(A) (*(A+1)) /* symbol in a meta symbol block */
#define MNEXT(A) (A+2) /* character following a metasymbol block */
#define OSYM(A) (*(A+1)) /* symbol in an operation block */
#define OCNT(A) (*(A+2)) /* character count */
#define ONEXT(A) (A+3) /* next character after the operation */
#define OPTR(A) (A+*(A+2)) /* place pointed to by the operator */
#define SCNT(A) (*(A+1)) /* byte count of a string */
#define SSTR(A) (A+2) /* address of the string */
#define SNEXT(A) (A+2+*(A+1)) /* character following the string */
/*
* bit flags in the descriptor
*/
#define OPT 1
#define STR 2
#define META 4
#define ALT 8
#define OPER 16
char *ure; /* pointer current position in unconverted exp */
char *ccre; /* pointer to current position in converted exp */
/*
* Convert an irregular expression to the internal form
*/
char *
convexp(re)
char *re; /* unconverted irregular expression */
{
register char *cre; /* pointer to converted regular expression */
/* allocate room for the converted expression */
if (re == NULL)
return NULL;
if (*re == '\0')
return NULL;
cre = malloc(4 * strlen(re) + 3);
ccre = cre;
ure = re;
/* start the conversion with a %|\a|% */
*cre = META | OPT;
MSYM(cre) = 'a';
ccre = MNEXT(cre);
/* start the conversion (it's recursive) */
expconv();
*ccre = 0;
return cre;
}
/*
* Actually do the conversion
*/
static void
expconv()
{
register char *cs; /* pointer to current symbol in converted exp */
register char c; /* character being processed */
register char *acs; /* pointer to last alternate */
register int temp;
/* let the conversion begin */
acs = NULL;
cs = NULL;
while (*ure != '\0') {
switch (c = *ure++) {
case '\\':
switch (c = *ure++) {
/* escaped characters are just characters */
default:
if (cs == NULL || (*cs & STR) == 0) {
cs = ccre;
*cs = STR;
SCNT(cs) = 1;
ccre += 2;
} else
SCNT(cs)++;
*ccre++ = c;
break;
/* normal(?) metacharacters */
case 'a':
case 'd':
case 'e':
case 'p':
if (acs != NULL && acs != cs) {
do {
temp = OCNT(acs);
OCNT(acs) = ccre - acs;
acs -= temp;
} while (temp != 0);
acs = NULL;
}
cs = ccre;
*cs = META;
MSYM(cs) = c;
ccre = MNEXT(cs);
break;
}
break;
/* just put the symbol in */
case '^':
case '$':
if (acs != NULL && acs != cs) {
do {
temp = OCNT(acs);
OCNT(acs) = ccre - acs;
acs -= temp;
} while (temp != 0);
acs = NULL;
}
cs = ccre;
*cs = META;
MSYM(cs) = c;
ccre = MNEXT(cs);
break;
/* mark the last match sequence as optional */
case '?':
if (cs)
*cs = *cs | OPT;
break;
/* recurse and define a subexpression */
case '(':
if (acs != NULL && acs != cs) {
do {
temp = OCNT(acs);
OCNT(acs) = ccre - acs;
acs -= temp;
} while (temp != 0);
acs = NULL;
}
cs = ccre;
*cs = OPER;
OSYM(cs) = '(';
ccre = ONEXT(cs);
expconv();
OCNT(cs) = ccre - cs; /* offset to next symbol */
break;
/* return from a recursion */
case ')':
if (acs != NULL) {
do {
temp = OCNT(acs);
OCNT(acs) = ccre - acs;
acs -= temp;
} while (temp != 0);
acs = NULL;
}
cs = ccre;
*cs = META;
MSYM(cs) = c;
ccre = MNEXT(cs);
return;
/* Mark the last match sequence as having an alternate. */
/* The third byte will contain an offset to jump over the */
/* alternate match in case the first did not fail */
case '|':
if (acs != NULL && acs != cs)
OCNT(ccre) = ccre - acs; /* make a back pointer */
else
OCNT(ccre) = 0;
*cs |= ALT;
cs = ccre;
*cs = OPER;
OSYM(cs) = '|';
ccre = ONEXT(cs);
acs = cs; /* remember that the pointer is to be filled */
break;
/* if it's not a metasymbol, just build a character string */
default:
if (cs == NULL || (*cs & STR) == 0) {
cs = ccre;
*cs = STR;
SCNT(cs) = 1;
ccre = SSTR(cs);
} else
SCNT(cs)++;
*ccre++ = c;
break;
}
}
if (acs != NULL) {
do {
temp = OCNT(acs);
OCNT(acs) = ccre - acs;
acs -= temp;
} while (temp != 0);
acs = NULL;
}
return;
} /* end of @expconv()@ */
/*
* The following routine recognises an irregular expression
* with the following special characters:
*
* %|\?|% means last match was optional
* %|\a|% matches any number of characters
* %|\d|% matches any number of spaces and tabs
* %|\p|% matches any number of alphanumeric characters.
* The characters matched will be copied into
* the area pointed to by @mstring@.
* %|\||% alternation
* %|\( \)|% grouping used mostly for alternation and
* optionality
*
* The irregular expression must be translated to internal form
* prior to calling this routine
*
* The value returned is the pointer to the last character matched.
* If @strtptr@ is non-@NULL@, a pointer to the start of the string matched
* (excluding %|\a|% matches) will be returned at @*strtptr@.
* If @mstring@ is non-@NULL@, the string matched by a %|\p|% will be copied
* into @mstring@.
*/
boolean _escaped; /* true if we are currently @_escaped@ */
char *_start; /* start of string. Set in @putScp()@. */
char *
expmatch(s, re, strtptr, mstring)
register char *s; /* string to check for a match in */
register char *re; /* a conv